/*
Input: 	
	> estimation_prep_ltw18_voter [prepared voter-address panel] 
	> tmp/migration_precincts.dta [prepared migration data (precinct level)]
	> tmp/ltw18_sb_area.dta 	  [prepared precinct area (precinct level)]
	> rawdata/RWI/02_data/final/election_rwi_red_sb_final [precinct-level rents by RWI]
	
Output: 
	> newdata/estimation_prep_ltw18 [final precinct-level panel]
	> arc_anly/arc_input/treated_precincts.txt [treated precincts for GIS processing]

Main tasks:		
	> Collapse to precinct-level and compute relevant variables
	> Generate relative time and sample definition for event study

*/


/*------ PRECINCT LEVEL ----------*/

* PULL: prepped address-level panel
use "$newdata/estimation_prep_ltw18_voter.dta", clear	

********************************************************************************
	// Collapse to precinct level & gen relevant variables //
********************************************************************************
**** COLLAPSE to election x precinct level	
		gcollapse  (mean) waehler* wahlber* ungueltige_stimmen ew_* wb_* avg_dur hh_*  /// 
								anz* *street_dist* *creased *treat* shr*  ///
								wl_school* sb_size_change, ///
									by(wahl wahl_type wahl_id jahr sb_new stadtbez) labelformat(#sourcelabel#) fast								

	format  shr_* %9.4f
	xtset sb_new wahl_id

	
**** MERGES 

	* Merge with migration data (across and within precincts)
		merge 1:1 sb_new wahl_id using "$tmp/migration_precincts.dta", assert(3 2) keep(3) nogen
	
	* MERGE precinct area (SE-18)								
		merge m:1 sb_new using "$tmp/ltw18_sb_area.dta", assert(3) nogen
		
	* MERGE avg. rental prices (RWI) (precinct level)
		merge m:1 jahr sb_new using "$rawdata/RWI/02_data/final/election_rwi_red_sb_final", assert(3) keepusing(mpreis_flats_rent) nogen 

	* MERGE with district level outcomes (STADTbezirk)
		merge m:1 stadtbez wahl using  "$tmp/stadtbez_postalcomp_partyoutc.dta", assert(3) nogen
		
**** Gen turnout variables (Outcomes)
	* Turnourt Urne
	gen 	turnout_urne = (waehler_gesamt-waehler_mit_wahlschein)/wahlber_gesamt
	lab var turnout_urne "Turnout at polling station / eligible pop)"
	
	* Turnout Postal requested
	gen 	turnout_pos_req =  wahlber_mit_wahlschein / wahlber_gesamt
	lab var turnout_pos_req "Turnout eligible ppl with Wahlschein / eligible pop"
	
	* Turnout Overall
	gen 	turnout_tot_req = turnout_urne + turnout_pos_req
	lab var turnout_tot_req	"Total turnout, NOT corrected for postal compliance"
	

**** gen/adjust Variables

 * gen log 
	assert street_dist >0
	gen ln_street_dist =ln(street_dist)
	lab var ln_street_dist 		"Log walking distance"
	
	assert ew_ges >0
	gen ln_ew_ges =ln(ew_ges)
	lab var ln_ew_ges 		"Log number of residents"
 
 * norm. migration by size of population 
	foreach v in withmig outmig inmig {
		gen 	`v'_pop = `v'/ew_ges
	}
	lab var withmig_pop "\% within migration"
	lab var outmig_pop  "\% Outmigration"
	lab var inmig_pop  "\% Inmigration"

	
 * gen ew_dtmihi := Germans with migration background
	// 	note: 	ew_mihi: including non-EU foreigners (ew_ausl_else), 
	//					EU foreigners (ew_ausl_eu), and Germans with 
	//					a migration background (Deutsche mit MiHi).
	//			ew_dt: ethnic Germans without a migration background 
	//				 and Germans with a migration background (Deutsche mit MiHi), combining both 
	//				native-born Germans and those of German citizenship with migrant origins.
	
	gen ew_ausl = ew_ausl_eu + ew_ausl_else
	
	gen ew_dtmihi = ew_mihi - ew_ausl
	
	gen ew_biodt = ew_dt - ew_dtmihi			// share of biodt + dtmihi + ausl = total

	gen wb_anteil = wahlber_gesamt/ew_ges	// share of electorate
	

 * gen binary treatment dummies (parttreat_simple, fulltreat_simple)
	// treated if > 0 (at least part of precinct treated)
	foreach v of varlist treat_* {
		gen part`v' = (`v' >0)
		lab var part`v' "Binary treatment of `v' (treat > 0)"
		cap drop parttreat*dist
	 }
	// treated if = 1 (WHOLE precinct treated)
	foreach v of varlist treat_* {
		gen full`v' = (`v' == 1)
		lab var full`v' "Binary treatment of `v' (treat = 1)"
		cap drop fulltreat*dist
	 }
	 
*** Outcomes
	// share of mail-in votes in total votes
	gen 	share_mail = 100*turnout_pos_req/turnout_tot_req
	lab var share_mail	"Share of Postal votes in total votes"	

	
* Rescale Covariates to Percentages
	foreach v of varlist wb_* ew_* hh_* *mig_pop {
	 if inrange(`v',0,1) {
		replace `v' = 100*`v'
	 }
	}
	
	
 * (Re-)label Variables for Table outputs
	lab var del_street_dist		"Avg. change in walking distance (km)"
	lab var street_increased "Share HH, where RA increased dist"
	lab var street_decreased "Share HH, where RA decreased dist" 
	lab var ew_ges 		"Number of residents"
	lab var ln_ew_ges 	"Log number of residents"
	lab var ew_biodt	"\% native German residents"
	lab var ew_mihi 	"\% migrant residents (MiHi+foreigners)"
	lab var ew_dtmihi 	"\% non-native German residents"
	lab var ew_ausl		"\% foreign residents"
	lab var ew_ausl_eu	"\% EU foreigners"
	lab var ew_ausl_else "\% non-EU foreigners"
	lab var ew_dt		"\% German residents"
	lab var ew_ledig	"\% single residents"
	lab var ew_married 	"\% married residents"
	
	lab var wahlber_gesamt "Number of eligible voters"
	lab var wb_anteil 	"\% inhabitants eligible to vote"
	lab var wb_dt 		"\% Germans in the electorate"
	lab var wb_ausl		"\% EU-foreigners in the electorate"
	lab var wb_18t24 	"\% electorate aged 18-24"
	lab var wb_25t34 	"\% electorate aged 25-34"
	lab var wb_35t44 	"\% electorate aged 35-44"
	lab var wb_45t59 	"\% electorate aged 45-59"
	lab var wb_60plus 	"\% electorate aged 60+"
	
	lab var avg_dur		"Average duration of residence"
	lab var hh_kids		"\% households with children"
	lab var hh_sgl 		"\% single-person households"
	
	lab var area_sb_ltw18		"Area in qkm"
	lab var mpreis_flats_rent 	"Average quoted rent per sqm"
	
	lab var turnout_urne 	"Polling place turnout"
	lab var turnout_tot_req "Overall turnout (requested)"
	lab var turnout_pos_req "Mail-in turnout (requested)"
	
* Gen absolute values of covariates in thousands
	cap drop abs_*
	foreach v of varlist ew_*{
	 if inrange(`v',0,100) {
		gen abs_`v' = ew_ges * (`v'/100) / 1000
		local lab : var lab `v'
		local labnew = subinstr("`lab'","\%", "Number of",.)
		local labnew "`labnew' (thsd)"
		lab var abs_`v' "`labnew'"
	 }
	} 
	foreach v of varlist wb_*{
	 if inrange(`v',0,100) {
		gen abs_`v' = wahlber_gesamt * (`v'/100) / 1000
		local lab : var lab `v'
		local labnew = subinstr("`lab'","\%", "Number of",.)
		local labnew "`labnew' (thsd)"
		lab var abs_`v' "`labnew'"
	 }
	}	
	gen 		abs_ew_ges = ew_ges/1000
	lab var 	abs_ew_ges "Number of residents (thsd)"
	
	


********************************************************************************
	// Generate relative time and sample definition for event study //
********************************************************************************

	* Lab vars for output
	lab var turnout_urne 		"Effect on polling place turnout"
	lab var turnout_pos_req 	"Effect on mail-in turnout"
	lab var turnout_tot_req 	"Effect on total turnout"
	

	* rescale turnout	[0-100]
	foreach v of varlist turnout* {
		assert inrange(`v',0,1)
		replace `v' = `v'* 100
	}		
	
	
	* GEN fulltottreat100= Total nbr. of times a precinct is treated (reassigned=100%)
	bys sb_new: egen fulltottreat100 = total(fulltreat_simple)	
	lab var fulltottreat100 "Total treat_simple==1 by precinct"
	
	* GEn treat50_simple = Binary indicator for treat_simple >0.5
	gen treat50_simple = (treat_simple >=.5)
	lab var treat50_simple "Binary treatment (treat_simple >.5)"
	
	* Gen  fulltotreat50= Total nbr. of times a precinct is treated (>50% reassigned)
	bys sb_new: egen fulltottreat50 = total(treat50_simple)	
	lab var fulltottreat50 "Total treat_simple> .5 by precinct"


***** DEFINITION 1: Event= first time precinct is FULLY treated (treat_simple=1)	
	cap drop tmp*

 * Gen relevant indicators	
	// gen Ei = unit-specific date of treatment (never-treated = missing)
	gen tmp_wahl_id = (-1)*wahl_id
	bys sb_new (fulltreat_simple tmp_wahl_id): gen Ei = wahl_id[_N] if fulltottreat100>0 // wahl_id where first FULL-treatment (treat_simple==1)
	lab var Ei "date of treatment,treat=100% reassigned, NT=."
	lab val Ei wahl_id
	
	// gen K = == "relative time", i.e. the number periods since treated (never-treated = missing)
	cap drop K
	gen K = wahl_id- Ei	
	lab var K "rel. time, treat=100% reassigned, NT=."
	
	// gen D treatment indicator (=1 post treatmeant, never-treated = 0)
	cap drop D
	gen D = K>=0 & Ei!=. 
	lab var D "post-treatment dummy, treat=100% reassigned, NT=0"
	
	
	// gen cleanctr = CONTROL has ZERO reassgs (precinct-level var)
	cap drop tmp*
	bys sb_new: egen tmp=total(treat_simple>0)
	gen 	cleanctr= (tmp==0)
	replace cleanctr=1 if !missing(Ei) 
	lab var cleanctr "IDs treat=100% & CTRL w/ ZERO reassgn. throughout"
	*tab Ei if cleanctr==1 & wahl_id==1, mis	
	
	
	// gen never_treated := id precincts that are never-treated (=control precincts)
	gen T = (Ei!=.)	
	recode T (0=1)(1=0), gen(NT)
	lab var T  "treated units (Ei!=.)"
	lab var NT "never treated unit (Ei=.)"
	
	* GEN sb_const_size: (precinct-level) ids CTRL and TREATED precincts w/o size changes in t=0
	gen 	sb_const_size= (K==0 & sb_size_change==0)
	replace sb_const_size=1 if missing(Ei)			// all CTR precincts are included
	assert !missing(sb_const_size)
	bys sb_new (sb_const_size): replace sb_const_size=sb_const_size[_N]	

 * Create (optional) sample restrictions
	cap drop tmp*
	
	// smpl_trim: drop observations after SECOND treatment kicks in
	cap drop 	smpl_trim
	gen 		smpl_trim = 1
	gen tmp_other_treats = (fulltreat_simple > 0 & K != 0)		// identify second treatments after t0 ONLY 100%
	bys sb_new (wahl_id): replace smpl_trim = 0 if (tmp_other_treats == 1 | smpl_trim[_n-1] ==0) // set missing after second treatment kicks in	
	lab var smpl_trim "Obs after 2. treatment dropped, treat=100% reassigned"
	
	
	// smpl_bal: balanced panel around t in (-4,2)
	// -> includes FE-17, SE-18: 208 treated units
	cap drop smpl_bal
	cap drop help*
	bys sb_new: gen help = (K==-4 | K==2)
	bys sb_new: egen help2 = total(help)
	gen smpl_bal = (missing(Ei) | help2 == 2)
	lab var smpl_bal "=1 if NT or balanced in t(-4,2), treat=100%"
	tab K if smpl_bal==1
	drop help*
	
	
	// smpl_bal17: balanced panel around FE-17 in (-4,2) [173 treated units]
	gen smpl_bal17=smpl_bal 
	replace smpl_bal17=0 if Ei !=5 & !missing(Ei)
	lab var smpl_bal17 "=1 if NT or balanced around FE-17, treat=100%"
	tab K if smpl_bal17==1

	// smpl_bal_tp1: balanced panel around t in (-4,1) WITHOUT smpl_trim
	//	-> includes FE-17, SE-18, EU-19: 220 treated units
	cap drop help*
	bys sb_new: gen help = (K==-4 | K==1)
	bys sb_new: egen help2 = total(help)
	gen 	smpl_bal_tp1 = (missing(Ei) | help2 == 2)
	lab var smpl_bal_tp1 "=1 if NT or balanced in t(-4,1), treat=100%"
	tab K if smpl_bal_tp1==1
	drop help*	
	
	// smpl_bal_tm2: balanced panel around t in (-2,2) WITHOUT smpl_trim
	//	-> includes ME-14, FE-17, SE-18: 218 treated units
	cap drop help*
	bys sb_new: gen help = (K==-2 | K==2)
	bys sb_new: egen help2 = total(help)
	gen 	smpl_bal_tm2 = (missing(Ei) | help2 == 2)
	lab var smpl_bal_tm2 "=1 if NT or balanced in t(-2,2), treat=100%"
	tab K if smpl_bal_tm2==1
	drop help*	
	
	// smpl_bal_tpm: balanced panel around t in (-2,1) WITHOUT smpl_trim
	//	-> includes ME-14m FE-17, SE-18, EU-19: 230 treated units
	cap drop help*
	bys sb_new: gen help = (K==-2 | K==1)
	bys sb_new: egen help2 = total(help)
	gen 	smpl_bal_tpm = (missing(Ei) | help2 == 2)
	lab var smpl_bal_tpm "=1 if NT or balanced in t(-2,1), treat=100%"
	tab K if smpl_bal_tpm==1
	drop help*	
	
	// smpl_bal_tp0: balanced panel around t in (-4,0) WITHOUT smpl_trim
	//	-> includes ME-14m FE-17, SE-18, EU-19: 230 treated units
	cap drop help*
	bys sb_new: gen help = (K==-2 | K==0)
	bys sb_new: egen help2 = total(help)
	gen 	smpl_bal_tp0 = (missing(Ei) | help2 == 2)
	lab var smpl_bal_tp0 "=1 if NT or balanced in t(-4,0), treat=100%"
	tab K if smpl_bal_tp0==1
	drop help*	
	
	
	// smpl_trim_dist: only precincts w/ dist. increases based + smpl_trim
	cap drop dincreas
	gen 	 dincreas = del_street_dist>0 if K==0 // id precinct where distance increased
	bys sb_new (dincreas): replace dincreas=dincreas[1]
	replace	 dincreas = 1 if Ei==.
	lab var dincreas "id preincts with dist increase in K=0 and NT"
	
	cap drop ddecreas
	gen 	 ddecreas = del_street_dist<0 if K==0 // id precinct where distance increased
	bys sb_new (ddecreas): replace ddecreas=ddecreas[1]
	replace	 ddecreas = 1 if Ei==.
	lab var ddecreas "id precincts with dist decrease in K=0 and NT"	

	gen 	smpl_trim_dist = (smpl_trim==1&dincreas==1)
	lab var smpl_trim_dist "=1 if NT or dist increase, treat=100%, 2. dropped"
	clonevar smpl_trim_dist_up=smpl_trim_dist
	
	gen 	smpl_trim_dist_dwn = (smpl_trim==1&ddecreas==1)
	lab var smpl_trim_dist_dwn "=1 if NT or dist increase, treat=100%, 2. dropped"	
	
***** DEFINITION 2: Event= first time precinct is >50% reassigned (treat_simple>.5)	
	cap drop tmp*
	
* Gen relevant indicators	
	// gen Ei50 = unit-specific date of treatment (never-treated = missing)
	gen tmp_wahl_id = (-1)*wahl_id
	bys sb_new (treat50_simple tmp_wahl_id): gen Ei50 = wahl_id[_N] if fulltottreat50>0 // wahl_id where first FULL-treatment (treat_simple==1)
	lab var Ei50 "date of treatment,treat=50% reassigned, NT=."
	lab val Ei50 wahl_id
	
	// gen K = == "relative time", i.e. the number periods since treated (never-treated = missing)
	cap drop K50
	gen K50 = wahl_id- Ei50	
	lab var K50 "rel. time, treat=50% reassigned, NT=."
	
	// gen D treatment indicator (=1 post treatmeant, never-treated = 0)
	cap drop D50
	gen D50 = K50>=0 & Ei50!=. 
	lab var D50 "post-treatment dummy, treat=50% reassigned, NT=0"
	
	// gen cleanctr50 = CONTROL has ZERO reassgs (precinct-level var)
	cap drop tmp*
	bys sb_new: egen tmp=total(treat_simple>0)
	gen 	cleanctr50= (tmp==0)
	replace cleanctr50=1 if !missing(Ei50) 
	lab var cleanctr50 "IDs treat=50% & CTRL w/ ZERO reassgn. throughout"
	*tab Ei if cleanctr==1 & wahl_id==1, mis	

 * Define (optional) sample restrictions
	cap drop tmp*
	
	// smpl_trim50: drop observations after SECOND treatment (>50%) kicks in
	cap drop 	smpl_trim50
	gen 		smpl_trim50 = 1
	gen tmp_other_treats = (treat50_simple > 0 & K50 != 0)		// identify second treatments after t0
	bys sb_new (wahl_id): replace smpl_trim50 = 0 if (tmp_other_treats == 1 | smpl_trim50[_n-1] ==0) // set missing after second treatment kicks in	
	lab var smpl_trim50 "Obs after 2. treatment dropped, treat=50% reassigned"
	
	
***** DEFINITION 3: Event= LARGEST Reassignment (treat_simple>0)	
	cap drop tmp*
	
* Gen relevant indicators	
	// gen Ei_max = unit-specific date of treatment (never-treated = missing)
	cap drop Ei_max
	gen tmp_wahl_id = (-1)*wahl_id
	bys sb_new: egen tmp_max=max(treat_simple)
	bys sb_new (treat_simple tmp_wahl_id): gen Ei_max = wahl_id[_N] if tmp_max>0 // wahl_id where greatest shock (treat_simple>0), (first if more than one election)
	lab var Ei_max "date of treatment,treat=max(treat_simple) reassigned, NT=."
	
	// gen K = == "relative time", i.e. the number periods since treated (never-treated = missing)
	cap drop K_max
	gen K_max = wahl_id- Ei_max	
	lab var K_max "rel. time, treat=max(treat_simple) reassigned, NT=."
	
	// gen D treatment indicator (=1 post treatmeant, never-treated = 0)
	cap drop D_max
	gen D_max = K_max>=0 & Ei_max!=. 
	lab var D_max "post-treatment dummy, treat=max(treat_simple) reassigned, NT=0"

 * Define (optional) sample restrictions
	cap drop tmp*
	
	// smpl_trim_max: drop observations after SECOND treatment (>50%) kicks in
	cap drop 	smpl_trim_max
	gen 		smpl_trim_max = 1
	gen tmp_other_treats = (treat_simple > 0 & K_max > 0)		// NOTE: here I take ANY positive treatment after K=0
	bys sb_new (wahl_id): replace smpl_trim_max = 0 if (tmp_other_treats == 1 | smpl_trim_max[_n-1] ==0) // set missing after second treatment kicks in	
	lab var smpl_trim_max "Obs after 2. treatment dropped, treat=max(treat_simple) reassigned"	
	
	cap drop tmp*
* save
	compress
	save "$newdata/estimation_prep_ltw18.dta", replace
	
	
** EXPORT files to ArcGIS for mapping & processing

 * Export treated and never-treated precincts to build locally matched sample in ArcGIS
	export delim sb_new T using "$arc_anly/arc_input/treated_precincts.txt" if wahl_id==1, replace
